/*******************************************************************************

This code file cleans the NYC property transactions dataset, for use
in measuring underassessment.

*******************************************************************************/

*** Manage settings

	run "~/Dropbox (MIT)/Research/NYC421a/code/modules/settings.do"
	
	set more off
	clear

*** Import transactions data via loop
	
	cd "$data/raw/transactions"

	* List CSVs in local directory
	local files: dir . files "*.xls"

	tempfile tmp
	set obs 1
	gen tmp = .
	save `tmp', replace

	foreach f of local files {

		di "`f'"
		quietly import excel "`f'", firstrow case(lower) allstring clear
		quietly append using `tmp', force
		quietly save `tmp', replace

	}

	
*** Clean transactions data	

	* Drop unneeded/duplicative vars
	drop easement tmp taxclassatpresent buildingclassatpresent taxclassasoffinalroll1819 taxclassasoffinalroll1718 buildingclassasoffinalroll

	destring borough block lot zipcode residentialunits commercialunits totalunits landsquarefeet grosssquarefeet yearbuilt saleprice taxclassattimeofsale, replace force
	
	* Year of sale
	gen saledate_ = date(saledate,"MDY") 
	gen saledate__ = date(saledate,"DMY") 
	gen year = year(saledate_)
	replace year = year(saledate__) if missing(year)
	drop saledate_*

	* Drop blank entries
	drop if missing(borough)

	* Clean BBL code
		
		tostring borough, gen(boro)
		tostring block, gen(block_)
		tostring lot, gen(lot_)

		replace block_ = "0"+block_ if length(block_)<5
		replace block_ = "0"+block_ if length(block_)<5
		replace block_ = "0"+block_ if length(block_)<5
		replace block_ = "0"+block_ if length(block_)<5

		replace lot_ = "0"+lot_ if length(lot_)<4
		replace lot_ = "0"+lot_ if length(lot_)<4
		replace lot_ = "0"+lot_ if length(lot_)<4

		gen bbl = boro+block_+lot_
		destring bbl, replace

		drop block_ lot_ boro

		format bbl %18.0f

	* Keep only residential tax lots

		keep if taxclassattimeofsale == 1 | taxclassattimeofsale == 2
		drop if strpos(buildingclassattimeofsale,"V") > 0

	* Drop property transfers	
	drop if saleprice == 0
	
	foreach v of varlist neighborhood buildingclasscategory apartmentnumber address {
		replace `v' = strtrim(`v')
	}
	
	* Format price variable 
	gen lsaleprice = ln(saleprice)
	
*** Merge into main datafile		
		
	* Merge into PAD xwalk
	merge m:1 bbl using "$data/clean/xwalk_condo_bbl.dta", nogen keep(1 3)
	
*** Save datafile	

	save "$data/clean/all_transactions.dta", replace
